Return to Data Visualisation Section
The data comes from the Tidy Tuesday project launched by R for Data Science team. It contains state-level salary information on registered nurses over the period 1998 to 2020.
As we have aggregated statistics already, there is no need for any data cleaning or aggregating. As this post is part of the data visualisation, let’s get straight to it.
nurses %>%
group_by(year) %>%
summarise(mean_salary = mean(annual.salary.avg, na.rm = TRUE),
higher_band = mean(annual.90th.percentile, na.rm = TRUE),
lower_band = mean(annual.10th.percentile, na.rm = TRUE)) %>%
ggplot(aes(year, mean_salary)) +
geom_line(colour = "dodgerblue") +
geom_ribbon(aes(ymin = lower_band, ymax = higher_band),
fill = "dodgerblue", colour = "dodgerblue", alpha = 0.4,
lty = "dotted") +
labs(title = "Average Salaries Over Time",
subtitle = "Confidence Bands showing 10th and 90th percentiles",
x = NULL,
y = "Salary") +
scale_x_continuous(labels = scales::number_format(accuracy = 1,
big.mark = "")) +
scale_y_continuous(labels = scales::dollar_format()) +
expand_limits(y = 0) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
nurses %>%
group_by(year) %>%
summarise(mean_salary = mean(annual.salary.avg, na.rm = T)) %>%
mutate(change = mean_salary/lag(mean_salary) - 1) %>%
filter(!is.na(change)) %>%
ggplot(aes(year, change)) +
geom_col(fill = "dodgerblue") +
labs(title = "Yearly Salary Growth For Registered Nurses",
subtitle = "Growth calculated on average salaries across states",
x = NULL,
y = "YoY Change in Salaries") +
scale_x_continuous(labels = scales::number_format(accuracy = 1,
big.mark = "")) +
scale_y_continuous(labels = scales::percent_format()) +
expand_limits(y = 0) +
theme_bw() +
theme(panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
nurses %>%
group_by(year) %>%
summarise(total_employment = mean(total.employed..national._aggregate, na.rm = T),
total_employment_healthcare = mean(total.employed..healthcare..national._aggregate, na.rm = T)) %>%
mutate(total_growth = total_employment/lag(total_employment) - 1,
healthcare_growth = total_employment_healthcare/lag(total_employment_healthcare) - 1) %>%
select(year, total_growth, healthcare_growth) %>%
filter(!is.na(total_growth)) %>%
rename("Healthcare" = healthcare_growth,
"Aggregate Economy" = total_growth) %>%
pivot_longer(-c(year)) %>%
ggplot(aes(year, value, fill = name)) +
geom_col(position = "dodge") +
labs(title = "YoY Employment Growth: Healthcare vs. Aggregate",
subtitle = "Growth calculated on aggregate values",
x = NULL,
y = "YoY Change in Employment",
fill = NULL) +
scale_x_continuous(labels = scales::number_format(accuracy = 1,
big.mark = "")) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c("midnightblue", "firebrick")) +
expand_limits(y = 0) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
nurses %>%
group_by(year) %>%
summarise(nurses = sum(total.employed.rn, na.rm = T)) %>%
ggplot(aes(year, nurses)) +
geom_line(colour = "dodgerblue") +
geom_point(colour = "dodgerblue") +
expand_limits(y = 0) +
labs(title = "Total Employed Registered Nurses In The US",
subtitle = "Data from Data.World",
x = NULL,
y = NULL,
fill = NULL) +
scale_x_continuous(labels = scales::number_format(accuracy = 1,
big.mark = "")) +
scale_y_continuous(labels = scales::comma_format()) +
scale_fill_manual(values = c("midnightblue", "firebrick")) +
expand_limits(y = 0) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
nurses %>%
filter(year == 2020) %>%
select(state, annual.salary.median) %>%
mutate(state = fct_reorder(state, annual.salary.median)) %>%
ggplot(aes(annual.salary.median, state)) +
geom_col(fill = "dodgerblue", colour = "white") +
labs(title = "Nurse Salaries By US States",
subtitle = "Data from Data.World",
x = "Median Salary",
y = NULL,
fill = NULL) +
scale_x_continuous(labels = scales::dollar_format()) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(face = "italic", colour = "grey50"))
A work by Mathias Steilen